package com.sweetdream.dataset.wordcount

import org.apache.flink.api.common.operators.Order
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.api.scala._

/**
 * Title: 词频统计
 * Description:
 * Date 2020/12/16
 */
object WordCount {
  def main(args: Array[String]) {
    // 1. env
    val env = ExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    // make parameters available in the web interface
    val params: ParameterTool = ParameterTool.fromArgs(args)
    env.getConfig.setGlobalJobParameters(params)

    // 2. source
    val text =
      if (params.has("input")) {
        env.readTextFile(params.get("input"))
      } else {
        println("Executing WordCount example with default input data set.")
        println("Use --input to specify file input.")
        env.fromCollection(WordCountData.WORDS)
      }

    // 3. transformation
    val counts = text
      .flatMap(_.toLowerCase.split("\\W+"))
      .filter(_.nonEmpty)
      .map((_, 1))
      .groupBy(0)
      .sum(1)
//      .sortPartition(_._1, Order.ASCENDING)


    // 4. sink
    if (params.has("output")) {
      counts.writeAsCsv(params.get("output"), "\n", " ")
      env.execute("Scala WordCount Example")
    } else {
      println("Printing result to stdout. Use --output to specify output path.")
      counts.print()
    }
  }
}
